
****************************************************;
 * Price Dispersion Other countries;
****************************************************;
#d;
use "${analysis}/appended_prices_long", clear;

*All products;
reghdfe log_retail_price_w5, absorb(month year country product dataset) vce(cluster location) resid;
quietly replace N=e(N) if _n==1;
quietly predict r if e(sample), resid;
sum r, d;
sum r if r>r(p5) & r<r(p95);
quietly replace residual_sd=r(sd) if _n==1; 
quietly drop r;
quietly replace description="All products - 5% winsorize" if _n==1;

reghdfe log_retail_price_w10, absorb(month year country product dataset) vce(cluster location) resid;
quietly replace N=e(N) if _n==2;
quietly predict r if e(sample), resid;
sum r, d;
sum r if r>r(p5) & r<r(p95);
quietly replace residual_sd=r(sd) if _n==2; 
quietly drop r;
quietly replace description="All products - 10% winsorize" if _n==2;

*Maize only;
reghdfe log_retail_price_w5 if product=="MAIZE" | product=="MAIZE GRAIN" | product=="MAIZE (WHITE)" | product=="MAIZE (LOCAL)", absorb(month year country product dataset) vce(cluster location) resid;
quietly replace N=e(N) if _n==3;
quietly predict r if e(sample), resid;
sum r, d;
sum r if r>r(p5) & r<r(p95);
quietly replace residual_sd=r(sd) if _n==3; 
quietly drop r;
quietly replace description="Maize only - 5% winsorize" if _n==3;

reghdfe log_retail_price_w10 if product=="MAIZE" | product=="MAIZE GRAIN" | product=="MAIZE (WHITE)" | product=="MAIZE (LOCAL)", absorb(month year country product dataset) vce(cluster location) resid;
quietly replace N=e(N) if _n==4;
quietly predict r if e(sample), resid;
sum r, d;
sum r if r>r(p5) & r<r(p95);
quietly replace residual_sd=r(sd) if _n==4; 
quietly drop r;
quietly replace description="Maize only - 10% winsorize" if _n==4;

*Fertilizer only;
reghdfe log_retail_price_w5 if dataset=="AMITSA" | dataset=="Africafertilizer.org", absorb(month year country product dataset) vce(cluster location) resid;
quietly replace N=e(N) if _n==5;
quietly predict r if e(sample), resid;
sum r, d;
sum r if r>r(p5) & r<r(p95);
quietly replace residual_sd=r(sd) if _n==5; 
quietly drop r;
quietly replace description="Fertilizer - 5% winsorize" if _n==5;

reghdfe log_retail_price_w10 if dataset=="AMITSA" | dataset=="Africafertilizer.org", absorb(month year country product dataset) vce(cluster location) resid;
quietly replace N=e(N) if _n==6;
quietly predict r if e(sample), resid;
sum r, d;
sum r if r>r(p5) & r<r(p95);
quietly replace residual_sd=r(sd) if _n==6; 
quietly drop r;
quietly replace description="Fertilizer - 10% winsorize" if _n==6;
outsheet description residual_sd N if _n<=7 using "${pool_results}/PRICES_residual_price_variation_secondary_datasets.out", replace;





********************************;
*Tanzania Price Dispersion;
********************************;

#d;
use "${analysis}/price_analysis_dataset", clear;
gen month=month(survey_date);
gen year=year(survey_date);

gen log_retail_price_std=log(retail_price_std);

gen log_retail_price_std_w5=log_retail_price_std;
egen product_group=group(product_name);
sum product_group;
local P=r(max);

local p 1;
quietly while `p' <= `P' {;
	sum log_retail_price_std if product_group==`p', d;
	replace log_retail_price_std_w5=r(p95) if log_retail_price_std>r(p95) & log_retail_price_std!=. & product_group==`p';
	replace log_retail_price_std_w5=r(p5) if log_retail_price_std<r(p5) & product_group==`p';
	local p = `p' + 1;
	};
	
for any N sd_residual: gen X=.;
gen regression="";



*All products;
reghdfe log_retail_price_std_w5, absorb(month year product_name) vce(cluster market) resid;
replace N=e(N) if _n==1;
quietly predict residual if e(sample), resid;
sum residual, d;
sum residual if residual>r(p5) & residual<r(p95);
replace sd_residual=r(sd) if _n==1;
replace regression ="all products - Tanzania" if _n==1;
drop residual;


*Maize;
reghdfe log_retail_price_std_w5 if product_name=="maize", absorb(month year) vce(cluster market) resid;
replace N=e(N) if _n==2;
quietly predict residual if e(sample), resid;
sum residual, d;
sum residual if residual>r(p5) & residual<r(p95);
replace sd_residual=r(sd) if _n==2;
replace regression ="maize - Tanzania" if _n==2;
drop residual;

outsheet regression sd_residual N using "${pool_results}/PRICES_calling_protocol_price_dispersion", replace;


*fertilizer;
use "${analysis}/agrovet_prices&sales_long", clear;
gen x=price_loc1_17 if strpos(type, "Urea");
egen urea_price_17=max(x), by(agrovet_id);
drop x;

gen x=price_loc1_17 if strpos(type, "DAP");
egen dap_price_17=max(x), by(agrovet_id);
drop x;
keep agrovet_id urea_price_17 dap_price_17;
sort agrovet_id;
drop if agrovet_id==agrovet_id[_n-1];
tempfile temp;
save `temp';

use "${analysis}/agrovet_merged", clear;
keep agrovet_id village_name ward district market;
merge 1:1 agrovet_id using `temp';
drop if _merge!=3;


for any dap urea: gen price_17_X=X_price_17 \ drop X_price_17;
reshape long price_17_, i(agrovet_id village_name market ward district) j(fert_variety) string;
rename price_17_ price_17;
gen log_price_17=log(price_17);

collapse (min) log_price_17, by(village_name market ward district fert_variety);

gen log_price_17_w5=log_price_17;
sum log_price_17 if fert_variety=="urea", d;
replace log_price_17_w5=r(p95) if log_price_17>r(p95) & log_price_17!=. & fert_variety=="urea";
replace log_price_17_w5=r(p5) if log_price_17<r(p5) & log_price_17!=. & fert_variety=="urea";
sum log_price_17 if fert_variety=="dap", d;
replace log_price_17_w5=r(p95) if log_price_17>r(p95) & log_price_17!=. & fert_variety=="dap";
replace log_price_17_w5=r(p5) if log_price_17<r(p5) & log_price_17!=. & fert_variety=="dap";

reghdfe log_price_17_w5, absorb(fert_variety) vce(cluster village_name) resid;
quietly gen N=e(N) if _n==1;
quietly predict r if e(sample), resid;
sum r, d;
sum r if r>r(p5) & r<r(p95);
quietly gen residual_sd=r(sd) if _n==1; 
quietly drop r;
quietly gen description="price dispersion (village level, constructed from agrovets)" if _n==1;
outsheet description residual_sd N if _n<=1 using "${pool_results}/PRICES_agrovet_price_dispersion.out", replace;
